# Set the number of time steps
n_steps = 100
# Set the number of random walks
n_walks = 100
# Generate random walks with the same mean and variance
set.seed(123) # for reproducibility
cov_matrix = diag(n_walks)
cov_matrix[cov_matrix == 0] = 0.6
random_walks = mvrnorm(n = n_steps, mu = rep(0, n_walks), Sigma = cov_matrix)
random_walks = apply(random_walks, 2, cumsum)
intercepts = rnorm(n_walks,0,5)
for (i in 1:ncol(random_walks)){
random_walks[,i] = random_walks[,i] + intercepts[i]
}
# Create a data frame in wide format
wide_df = data.frame(matrix(random_walks, ncol = n_walks))
colnames(wide_df) = paste0("x", 1:n_walks)
wide_df$t = 1:n_steps
cov_matrix[c(1:10),c(1:10)]
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,] 1.0 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6
## [2,] 0.6 1.0 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6
## [3,] 0.6 0.6 1.0 0.6 0.6 0.6 0.6 0.6 0.6 0.6
## [4,] 0.6 0.6 0.6 1.0 0.6 0.6 0.6 0.6 0.6 0.6
## [5,] 0.6 0.6 0.6 0.6 1.0 0.6 0.6 0.6 0.6 0.6
## [6,] 0.6 0.6 0.6 0.6 0.6 1.0 0.6 0.6 0.6 0.6
## [7,] 0.6 0.6 0.6 0.6 0.6 0.6 1.0 0.6 0.6 0.6
## [8,] 0.6 0.6 0.6 0.6 0.6 0.6 0.6 1.0 0.6 0.6
## [9,] 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 1.0 0.6
## [10,] 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 1.0
# Plot the time series
matplot(as.matrix(wide_df[,-ncol(wide_df)]), type = "l", xlab = "t", ylab = "Value", main = "100 Time Series - Multivariate Random Walks")
# Convert to long format using tidyr
### Pivot
long_df = wide_df %>%
pivot_longer(cols = starts_with("x"),
names_to = "level",
values_to = "value")
# Print the long format data frame
print(long_df)
## # A tibble: 10,000 × 3
## t level value
## <int> <chr> <dbl>
## 1 1 x1 12.2
## 2 1 x2 -0.704
## 3 1 x3 3.58
## 4 1 x4 -2.02
## 5 1 x5 1.72
## 6 1 x6 6.88
## 7 1 x7 6.08
## 8 1 x8 -1.13
## 9 1 x9 -7.21
## 10 1 x10 -0.670
## # ℹ 9,990 more rows
# Number of knots
n_knots = 10
# Define the piecewise linear function
piecewise_linear = function(x, k) {
y = x - k
y = ifelse(x < 0, 0, y)
y = ifelse(x < k, 0, y)
return(y)
}
# Define a function to create a linear spline basis
create_basis = function(x, k) {
num_x = length(x)
num_k = length(k)
b = matrix(0, nrow = num_x, ncol = num_k)
for (i in 1:num_k) {
b_i = piecewise_linear(x, k[i])
b[, i] = b_i
}
return(b)
}
# Example usage
x = seq(1, n_steps, by = 1) # Sample x values
k_values = c(0:n_knots)*(n_steps/n_knots) # Values of k
basis_matrix_wide = create_basis(x, k_values)
basis_matrix_wide = basis_matrix_wide/max(basis_matrix_wide)
colnames(basis_matrix_wide) = paste0("basis_X", c(1:11))
head(basis_matrix_wide)
## basis_X1 basis_X2 basis_X3 basis_X4 basis_X5 basis_X6 basis_X7 basis_X8
## [1,] 0.01 0 0 0 0 0 0 0
## [2,] 0.02 0 0 0 0 0 0 0
## [3,] 0.03 0 0 0 0 0 0 0
## [4,] 0.04 0 0 0 0 0 0 0
## [5,] 0.05 0 0 0 0 0 0 0
## [6,] 0.06 0 0 0 0 0 0 0
## basis_X9 basis_X10 basis_X11
## [1,] 0 0 0
## [2,] 0 0 0
## [3,] 0 0 0
## [4,] 0 0 0
## [5,] 0 0 0
## [6,] 0 0 0
plot(basis_matrix_wide[,1], type="lines", col="blue")
lines(basis_matrix_wide[,2], col="red")
lines(basis_matrix_wide[,3], col="green")
lines(basis_matrix_wide[,4], col="purple")
lines(basis_matrix_wide[,5], col="red")
lines(basis_matrix_wide[,6], col="green")
lines(basis_matrix_wide[,7], col="purple")
lines(basis_matrix_wide[,8], col="red")
lines(basis_matrix_wide[,9], col="green")
lines(basis_matrix_wide[,10], col="purple")
## Merge with the long format data
t = c(1:n_steps)
basis_matrix_wide = cbind(t, basis_matrix_wide)
basis_matrix_wide = as.data.frame(basis_matrix_wide)
## Merge the dataframes
long_df = long_df %>% left_join(basis_matrix_wide, by='t')
long_df
## # A tibble: 10,000 × 14
## t level value basis_X1 basis_X2 basis_X3 basis_X4 basis_X5 basis_X6
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 x1 12.2 0.01 0 0 0 0 0
## 2 1 x2 -0.704 0.01 0 0 0 0 0
## 3 1 x3 3.58 0.01 0 0 0 0 0
## 4 1 x4 -2.02 0.01 0 0 0 0 0
## 5 1 x5 1.72 0.01 0 0 0 0 0
## 6 1 x6 6.88 0.01 0 0 0 0 0
## 7 1 x7 6.08 0.01 0 0 0 0 0
## 8 1 x8 -1.13 0.01 0 0 0 0 0
## 9 1 x9 -7.21 0.01 0 0 0 0 0
## 10 1 x10 -0.670 0.01 0 0 0 0 0
## # ℹ 9,990 more rows
## # ℹ 5 more variables: basis_X7 <dbl>, basis_X8 <dbl>, basis_X9 <dbl>,
## # basis_X10 <dbl>, basis_X11 <dbl>
# Now try filtering out all but a handful of data points from specific series and then retrain the model, predict again.
# Keep only 3 data points at x10 and retrain
drop = which(long_df$level == "x10")
drop = drop[-c(5, 25, 91)]
sample_df = long_df[-drop,]
create_formula = function(num_basis, target = "value"){
f = paste0(target, " ~ (1 | level) + ")
for (i in 1:num_basis){
f = paste0(f, "(basis_X",i, " - 1 | level) + basis_X",i)
if (i < num_basis){
f = paste0(f, " + ")
}
}
f = as.formula(f)
return(f)
}
f = create_formula(10)
lm_fit = lmer(f, data=sample_df)
y_p_train = predict(lm_fit)
sample_df$y_p = y_p_train
for (x in unique(sample_df$level)){
x1 = sample_df %>% filter(level == x)
plot(x1$value)
lines(x1$y_p, col="red")
}
# out of sample
y_p_test = predict(lm_fit, newdata=long_df)
long_df$y_p = y_p_test
x1 = long_df %>% filter(level == 'x10')
print(x)
## [1] "x10"
plot(x1$value)
lines(x1$y_p, col="red")
rf = ranef(lm_fit)
fe = fixef(lm_fit)
hist(rf$level$basis_X1)
fe
## (Intercept) basis_X1 basis_X2 basis_X3 basis_X4 basis_X5
## 0.3875783 -22.8253528 22.2025734 37.7532642 -72.0846254 40.7230919
## basis_X6 basis_X7 basis_X8 basis_X9 basis_X10
## -22.2477729 20.1480650 10.5205851 -36.6929338 -23.3739660
drop = which(long_df$level == "x10")
drop = drop[-c(5, 25,45, 60, 90)]
sample_df = long_df[-drop,]
f = create_formula(10)
lm_fit = lmer(f, data=sample_df)
y_p_train = predict(lm_fit)
sample_df$y_p = y_p_train
y_p_test = predict(lm_fit, newdata=long_df)
long_df$y_p = y_p_test
x1 = long_df %>% filter(level == 'x10')
print(x)
## [1] "x10"
plot(x1$value)
lines(x1$y_p, col="red")
drop = which(long_df$level == "x10")
drop = drop[-c(5, 25,45, 60,84, 90)]
sample_df = long_df[-drop,]
f = create_formula(10)
lm_fit = lmer(f, data=sample_df)
y_p_train = predict(lm_fit)
sample_df$y_p = y_p_train
y_p_test = predict(lm_fit, newdata=long_df)
long_df$y_p = y_p_test
x1 = long_df %>% filter(level == 'x10')
print(x)
## [1] "x10"
plot(x1$value)
lines(x1$y_p, col="red")